package com.goods.crawler.facade;

import com.goods.crawler.api.DTO.CrawlerRequestDTO;
import com.goods.crawler.api.VO.Category1NameUrlVO;
import com.goods.crawler.api.VO.Category1ToCategory2VO;
import com.goods.crawler.api.VO.Category2NameUrlVO;
import com.goods.crawler.api.VO.CategoryVO;
import com.goods.crawler.entity.Category;
import com.goods.crawler.service.CategoryService;
import com.goods.crawler.utils.WebDriverFactory;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * <pre>类名: CategoryFacade</pre>
 * <pre>描述: 爬取商品种类facade</pre>
 * <pre>版权: 浙江理工大学信息学院</pre>
 * <pre>日期: 2019/1/27 9:44</pre>
 * <pre>作者: chenwb</pre>
 */
@Service
public class CategoryFacade {

	private static Logger logger = LoggerFactory.getLogger(CategoryFacade.class);

	@Autowired
	private CategoryService categoryService;

	/**
	 * @param crawlerRequestDTO
	 * @return void
	 * @Description: 爬取天猫商品种类https://m.tmall.com/mblist/category/index.html
	 * @author chenwb
	 * @date 2019/1/27 17:40
	 */
	public void crawlerCategory(CrawlerRequestDTO crawlerRequestDTO) {
		// 获取浏览器驱动
		WebDriver driver = WebDriverFactory.getInstance();
		driver.get(crawlerRequestDTO.getUrl());
		this.wait(driver);
		CategoryVO categoryVO = this.getAllCategory(driver);
		boolean flag = false;
		for (Category1ToCategory2VO category1ToCategory2VO : categoryVO.getCategory2VOList()) {
			String category1Name = category1ToCategory2VO.getCategory1Name();
			List<Category2NameUrlVO> category2NameUrlVOS = category1ToCategory2VO.getCategory2NameUrlVOS();
			for (Category2NameUrlVO category2NameUrlVO : category2NameUrlVOS) {
		/*		if(flag == true){*/
					driver.quit();
					driver = WebDriverFactory.getInstance();
				/*}*/
				flag = true;
				driver.get(category2NameUrlVO.getCategory2Url());
				new WebDriverWait(driver, 20).until(ExpectedConditions.presenceOfElementLocated(By.className("copyright")));
				int pageTotal;
				int pageNum = 0;
				do {
					// 获取当前页码总数
					pageTotal = this.getAllPages(driver).findElements(By.tagName("li")).size();
					// 获取当前页码
					try {
						pageNum = Integer.parseInt(this.getAllPages(driver).findElement(By.className("current")).getText());
					} catch (Exception e) {
						logger.info("当前页码获取或解析错误--");
					}
					this.crawlerOnePageOf3(driver, category1Name, category2NameUrlVO.getCategory2Name());
					//点击下一页 点击current的下一个
					List<WebElement> pageElements = this.getAllPages(driver).findElements(By.tagName("li"));

					for (int i = 0; i < pageElements.size() - 1; i++) {
						if ("current".equals(pageElements.get(i).getAttribute("class"))) {
							logger.info("下一页,第{}页", i + 1);

							driver.get(pageElements.get(i + 1).findElement(By.tagName("a")).getAttribute("href"));
							break;
						}
					}
					this.wait(driver);
				} while (pageNum < pageTotal);
			}
		}
		logger.info("success,商品种类爬取完毕哦！！！");

	}

	/**
	 * @param driver
	 * @return void
	 * @Description: 界面统一等待
	 * @author chenwb
	 * @date 2019/1/27 17:41
	 */
	private void wait(WebDriver driver) {
		// 等待获取结果页
		new WebDriverWait(driver, 20).until(ExpectedConditions.presenceOfElementLocated(By.className("copyright")));
	}

	/**
	 * @param driver
	 * @return WebElement
	 * @Description: 获取当前分页节点
	 * @author chenwb
	 * @date 2019/1/27 17:41
	 */
	private WebElement getAllPages(WebDriver driver) {
		new WebDriverWait(driver, 20).until(ExpectedConditions.presenceOfElementLocated(By.className("wraper")));
		WebElement pageElement = driver.findElement(By.className("items"));
		return pageElement;
	}

	/**
	 * @param driver
	 * @param categoryName1
	 * @param categoryName2
	 * @return void
	 * @Description: 爬取当页商品类型
	 * @author chenwb
	 * @date 2019/1/27 17:41
	 */
	private void crawlerOnePageOf3(WebDriver driver, String categoryName1, String categoryName2) {
		List<WebElement> elementsOfCategoryName3 = driver.findElement(By.cssSelector("[class='content clearfix']")).findElement(By.cssSelector("[class='content clearfix']")).findElement(By.cssSelector("[class='catlist-3 clearfix']")).findElements(By.tagName("li"));
		List<Category> categories = new ArrayList<Category>();
		for (WebElement element2 : elementsOfCategoryName3) {
			String categoryName3 = element2.findElement(By.tagName("a")).getAttribute("title");
			Category category = new Category("", categoryName1, categoryName2, categoryName3);
			categories.add(category);
		}
		categoryService.saveCategory(categories);
	}

	/**
	 * @param driver
	 * @return CategoryVO
	 * @Description: 获取所有商品种类 会有点慢
	 * @author chenwb
	 * @date 2019/1/27 17:42
	 */
	private CategoryVO getAllCategory1(WebDriver driver) {
		List<WebElement> elements = null;
		Category1ToCategory2VO category1ToCategory2VO = new Category1ToCategory2VO();
		try {
			elements = driver.findElements(By.className("cat-1-li"));
		} catch (Exception e) {
			logger.info("一级类目未找到");
		}
		//测试代码
		//elements = elements.subList(0, 3);
		//存放一级类目数组
		List<Category1ToCategory2VO> category1ToCategory2VOList = new ArrayList<Category1ToCategory2VO>();
		// 一级类目循环
		for (WebElement element : elements) {
			String categoryName1 = element.findElement(By.tagName("h1")).findElement(By.tagName("a")).getAttribute("title");
			category1ToCategory2VO.setCategory1Name(categoryName1);
			List<WebElement> elementsOfCategoryName2;
			try {
				elementsOfCategoryName2 = element.findElement(By.className("catlist-2")).findElements(By.tagName("li"));
			} catch (Exception e) {
				continue;
			}
			//存放二级类目名和url
			List<Category2NameUrlVO> category2NameUrlVOList = new ArrayList<Category2NameUrlVO>();
			for (WebElement element1 : elementsOfCategoryName2) {
				String category2Name = null;
				String category2Url = null;
				try {
					category2Name = element1.findElement(By.tagName("h2")).findElement(By.tagName("a")).getAttribute("title");
					category2Url = element1.findElement(By.tagName("h2")).findElement(By.tagName("a")).getAttribute("href");
				} catch (Exception e) {
					category2Name = element1.findElement(By.tagName("a")).getAttribute("title");
					category2Url = element1.findElement(By.tagName("a")).getAttribute("href");
				}
				Category2NameUrlVO category2NameUrlVO = new Category2NameUrlVO(category2Name, category2Url);
				category2NameUrlVOList.add(category2NameUrlVO);
			}
			logger.info("{}搜集完毕", categoryName1);
			category1ToCategory2VO.setCategory2NameUrlVOS(category2NameUrlVOList);
			category1ToCategory2VOList.add(category1ToCategory2VO);
		}
		CategoryVO categoryVO = new CategoryVO();
		categoryVO.setCategory2VOList(category1ToCategory2VOList);
		logger.info("category1ToCategory2VOList的大小{}", category1ToCategory2VOList.size());
		return categoryVO;
	}

	/**
	 * @param driver
	 * @return CategoryVO
	 * @Description: 获取所有商品种类
	 * @author chenwb
	 * @date 2019/1/27 17:42
	 */
	private CategoryVO getAllCategory(WebDriver driver) {
		List<WebElement> elements = null;
		try {
			elements = driver.findElements(By.className("cat-1-li"));
		} catch (Exception e) {
			logger.info("一级类目未找到");
		}

		// 一级类目循环
		List<Category1ToCategory2VO> category1ToCategory2VOList = new ArrayList<Category1ToCategory2VO>();
		List<Category2NameUrlVO> category2NameUrlVOList = new ArrayList<Category2NameUrlVO>();
		for (WebElement element : elements) {
			WebElement element1 = element.findElement(By.tagName("h1")).findElement(By.tagName("a"));
			String categoryName1 = element1.getAttribute("title");
			String categoryUrl1 = element1.getAttribute("href");
			Category2NameUrlVO category2NameUrlVO = new Category2NameUrlVO(categoryName1, categoryUrl1);
			category2NameUrlVOList.add(category2NameUrlVO);
		}
		category1ToCategory2VOList.add(new Category1ToCategory2VO("", category2NameUrlVOList));
		CategoryVO categoryVO = new CategoryVO();
		categoryVO.setCategory2VOList(category1ToCategory2VOList);
		logger.info("category1ToCategory2VOList的大小{}", category1ToCategory2VOList.size());
		return categoryVO;
	}
}
